Construct Non-Hierarchical P/NBD Model for Online Retail Transaction Data

Author

Mick Cooney

Published

February 16, 2024

In this workbook we construct our first hierarchical P/NBD models on the synthetic data with the longer timeframe.

1 Load and Construct Datasets

We start by modelling the P/NBD model using our synthetic datasets before we try to model real-life data.

1.1 Load Online Retail Data

We now want to load the online retail transaction data.

Code
customer_cohortdata_tbl <- read_rds("data/onlineretail_cohort_tbl.rds")
customer_cohortdata_tbl |> glimpse()
Rows: 5,852
Columns: 5
$ customer_id     <chr> "12346", "12347", "12348", "12349", "12350", "12351", …
$ cohort_qtr      <chr> "2010 Q1", "2010 Q4", "2010 Q3", "2010 Q2", "2011 Q1",…
$ cohort_ym       <chr> "2010 03", "2010 10", "2010 09", "2010 04", "2011 02",…
$ first_tnx_date  <date> 2010-03-02, 2010-10-31, 2010-09-27, 2010-04-29, 2011-…
$ total_tnx_count <int> 3, 8, 5, 3, 1, 1, 9, 2, 1, 2, 6, 2, 5, 10, 6, 4, 10, 2…
Code
customer_transactions_tbl <- read_rds("data/onlineretail_transactions_tbl.rds")
customer_transactions_tbl |> glimpse()
Rows: 36,658
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:59, 2009-12-01 09…
$ orig_id       <chr> "13085", "13085", "13078", "15362", "18102", "12682", "1…
$ customer_id   <fct> 13085, 13085, 13078, 15362, 18102, 12682, 18087, 18087, …
$ invoice_id    <chr> "489434", "489435", "489436", "489437", "489438", "48943…
$ tnx_amount    <dbl> 505.30, 145.80, 630.33, 310.75, 2286.24, 372.30, 50.40, …
Code
customer_subset_id <- read_rds("data/onlineretail_customer_subset_ids.rds")
customer_subset_id |> glimpse()
 Factor w/ 5852 levels "13085","13078",..: 2 3 8 10 14 16 17 21 27 30 ...

1.2 Load Derived Data

Code
customer_summarystats_tbl <- read_rds("data/onlineretail_customer_summarystats_tbl.rds")

obs_fitdata_tbl   <- read_rds("data/onlineretail_obs_fitdata_tbl.rds")
obs_validdata_tbl <- read_rds("data/onlineretail_obs_validdata_tbl.rds")

customer_fit_stats_tbl <- obs_fitdata_tbl |>
  rename(x = tnx_count)

1.3 Load Subset Data

We also want to construct our data subsets for the purposes of speeding up our valuations.

Code
customer_fit_subset_tbl <- obs_fitdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_fit_subset_tbl |> glimpse()
Rows: 1,000
Columns: 6
$ customer_id    <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700,…
$ first_tnx_date <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 0…
$ last_tnx_date  <dttm> 2010-11-30 13:17:59, 2010-09-17 10:36:59, 2010-09-30 1…
$ tnx_count      <dbl> 30, 1, 20, 6, 0, 35, 14, 9, 4, 9, 78, 2, 17, 5, 22, 2, …
$ t_x            <dbl> 52.02500, 41.43740, 43.32966, 51.72589, 0.00000, 52.034…
$ T_cal          <dbl> 52.08869, 52.08849, 52.08433, 52.08274, 52.07847, 52.07…
Code
customer_valid_subset_tbl <- obs_validdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_valid_subset_tbl |> glimpse()
Rows: 1,000
Columns: 3
$ customer_id       <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 177…
$ tnx_count         <dbl> 26, 0, 12, 7, 0, 37, 17, 4, 4, 13, 48, 0, 13, 3, 16,…
$ tnx_last_interval <dbl> 52.950000, NA, 52.931151, 51.780853, NA, 53.089286, …

We now use these datasets to set the start and end dates for our various validation methods.

Code
dates_lst <- read_rds("data/onlineretail_simulation_dates.rds")

use_fit_start_date <- dates_lst$use_fit_start_date
use_fit_end_date   <- dates_lst$use_fit_end_date

use_valid_start_date <- dates_lst$use_valid_start_date
use_valid_end_date   <- dates_lst$use_valid_end_date

We now split out the transaction data into fit and validation datasets.

Code
customer_fit_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_fit_start_date,
    tnx_timestamp <= use_fit_end_date
    )
  
customer_fit_transactions_tbl |> glimpse()
Rows: 4,164
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 09…
$ orig_id       <chr> "13078", "15362", "14110", "13758", "17592", "13767", "1…
$ customer_id   <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700, …
$ invoice_id    <chr> "489436", "489437", "489443", "489446", "489462", "48946…
$ tnx_amount    <dbl> 630.33, 310.75, 285.06, 996.10, 148.30, 1197.80, 251.10,…
Code
customer_valid_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_valid_start_date,
    tnx_timestamp <= use_valid_end_date
    )
  
customer_valid_transactions_tbl |> glimpse()
Rows: 3,422
Columns: 5
$ tnx_timestamp <dttm> 2010-12-01 09:32:00, 2010-12-01 09:59:00, 2010-12-01 10…
$ orig_id       <chr> "15291", "16250", "12431", "13408", "13767", "12791", "1…
$ customer_id   <fct> 15291, 16250, 12431, 13408, 13767, 12791, 17908, 16583, …
$ invoice_id    <chr> "536376", "536388", "536389", "536394", "536395", "53640…
$ tnx_amount    <dbl> 657.60, 452.28, 716.50, 2049.36, 1015.76, 355.20, 486.56…

Finally, we want to extract the first transaction for each customer, so we can add this data to assess our models.

Code
customer_initial_tnx_tbl <- customer_fit_transactions_tbl |>
  slice_min(n = 1, order_by = tnx_timestamp, by = customer_id)

customer_initial_tnx_tbl |> glimpse()
Rows: 1,002
Columns: 5
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:08:00, 2009-12-01 09…
$ orig_id       <chr> "13078", "15362", "14110", "13758", "17592", "13767", "1…
$ customer_id   <fct> 13078, 15362, 14110, 13758, 17592, 13767, 17238, 17700, …
$ invoice_id    <chr> "489436", "489437", "489443", "489446", "489462", "48946…
$ tnx_amount    <dbl> 630.33, 310.75, 285.06, 996.10, 148.30, 1197.80, 251.10,…

We now expand out these initial transactions so that we can append them to our simulations.

Code
sim_init_tbl <- customer_initial_tnx_tbl |>
  transmute(
    customer_id,
    draw_id       = list(1:n_sim),
    tnx_timestamp,
    tnx_amount
    ) |>
  unnest(draw_id)

sim_init_tbl |> glimpse()
Rows: 2,004,000
Columns: 4
$ customer_id   <fct> 13078, 13078, 13078, 13078, 13078, 13078, 13078, 13078, …
$ draw_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
$ tnx_timestamp <dttm> 2009-12-01 09:05:59, 2009-12-01 09:05:59, 2009-12-01 09…
$ tnx_amount    <dbl> 630.33, 630.33, 630.33, 630.33, 630.33, 630.33, 630.33, …

Before we start on that, we set a few parameters for the workbook to organise our Stan code.

Code
stan_modeldir <- "stan_models"
stan_codedir  <-   "stan_code"

2 Fit First P/NBD Model

We now construct our Stan model and prepare to fit it with our synthetic dataset.

We also want to set a number of overall parameters for this workbook

To start the fit data, we want to use the 1,000 customers. We also need to calculate the summary statistics for the validation period.

2.1 Compile and Fit Stan Model

We now compile this model using CmdStanR.

Code
pnbd_fixed_stanmodel <- cmdstan_model(
  "stan_code/pnbd_fixed.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

We then use this compiled model with our data to produce a fit of the data.

Code
stan_modelname <- "pnbd_onlineretail_fixed1"
stanfit_seed   <- stanfit_seed + 1
stanfit_prefix <- str_c("fit_", stan_modelname) 

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")

stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    lambda_mn = 0.25,
    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 1.00,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_fixed1_stanfit <- pnbd_fixed_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_fixed1_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_fixed1_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_fixed1_stanfit$print()
  variable      mean    median    sd   mad        q5       q95 rhat ess_bulk
 lp__      -68216.39 -68216.50 72.39 74.20 -68334.80 -68096.17 1.00      626
 lambda[1]      0.42      0.40  0.18  0.17      0.18      0.76 1.01     2296
 lambda[2]      0.55      0.54  0.10  0.10      0.40      0.72 1.00     2235
 lambda[3]      0.04      0.03  0.02  0.02      0.01      0.08 1.00     1991
 lambda[4]      1.53      1.52  0.16  0.17      1.27      1.81 1.00     1832
 lambda[5]      0.36      0.35  0.08  0.08      0.24      0.48 1.01     2623
 lambda[6]      0.27      0.26  0.07  0.07      0.17      0.39 1.00     2125
 lambda[7]      0.06      0.05  0.03  0.03      0.02      0.11 1.00     2634
 lambda[8]      0.41      0.40  0.09  0.09      0.26      0.57 1.00     1746
 lambda[9]      0.17      0.16  0.06  0.06      0.09      0.27 1.01     2136
 ess_tail
      892
     1269
     1410
     1148
     1466
     1494
     1355
     1466
     1248
     1197

 # showing 10 of 12718 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_fixed1_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed1-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed1-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed1-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed1-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

2.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_fixed1_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_onlineretail_fixed1_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_fixed1_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

2.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_fixed1_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_fixed1_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_fixed1",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1010
  )

pnbd_onlineretail_fixed1_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_fixed1_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_fixed1_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_fixed1_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_fixed1_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_fixed1_assess_valid_simstats_tbl.rds"

2.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed1_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

2.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed1_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

           used  (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells  3741016 199.8   41181721 2199.4  64346438 3436.5
Vcells 91250607 696.2  271018448 2067.8 258543463 1972.6

3 Fit Alternate Prior Model.

We want to try an alternate prior model with a smaller co-efficient of variation to see what impact it has on our procedures.

Code
stan_modelname <- "pnbd_onlineretail_fixed2"
stanfit_seed   <- stanfit_seed + 1
stanfit_prefix <- str_c("fit_", stan_modelname) 

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")

stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    lambda_mn = 0.25,
    lambda_cv = 0.50,
    
    mu_mn     = 0.10,
    mu_cv     = 0.50,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_fixed2_stanfit <- pnbd_fixed_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_fixed2_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_fixed2_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_fixed2_stanfit$print()
  variable       mean     median    sd   mad         q5        q95 rhat
 lp__      -144231.01 -144230.00 69.85 69.68 -144346.00 -144117.00 1.00
 lambda[1]       0.34       0.33  0.11  0.11       0.18       0.53 1.00
 lambda[2]       0.50       0.49  0.08  0.08       0.37       0.65 1.00
 lambda[3]       0.08       0.07  0.04  0.03       0.03       0.14 1.01
 lambda[4]       1.31       1.30  0.14  0.15       1.08       1.55 1.00
 lambda[5]       0.34       0.33  0.07  0.06       0.23       0.46 1.00
 lambda[6]       0.26       0.26  0.06  0.06       0.17       0.38 1.00
 lambda[7]       0.09       0.09  0.04  0.03       0.04       0.16 1.00
 lambda[8]       0.39       0.38  0.08  0.08       0.26       0.53 1.00
 lambda[9]       0.19       0.18  0.06  0.06       0.11       0.30 1.00
 ess_bulk ess_tail
      583     1011
     3539     1484
     4188     1531
     3150     1356
     3257     1486
     3888     1376
     3858     1443
     3358     1145
     4363     1272
     3265     1298

 # showing 10 of 12718 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_fixed2_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed2-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed2-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed2-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed2-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

3.1 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_fixed2_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We want to check the \(N_{eff}\) statistics also.

Code
pnbd_onlineretail_fixed2_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_fixed2_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

3.2 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_fixed2_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_fixed2_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_fixed2",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1020
  )

pnbd_onlineretail_fixed2_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_fixed2_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_fixed2_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_fixed2_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_fixed2_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_fixed2_assess_valid_simstats_tbl.rds"

3.2.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed2_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

3.2.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed2_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

            used  (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells   3775149 201.7   37949578 2026.8  64346438 3436.5
Vcells 126854516 967.9  325302281 2481.9 312177068 2381.8

4 Fit Tight-Lifetime Model

We now want to try a model where we use priors with a tighter coefficient of variation for lifetime but keep the CoV for transaction frequency.

Code
stan_modelname <- "pnbd_onlineretail_fixed3"
stanfit_seed   <- stanfit_seed + 1
stanfit_prefix <- str_c("fit_", stan_modelname) 

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    lambda_mn = 0.25,
    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 0.50,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_fixed3_stanfit <- pnbd_fixed_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_fixed3_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_fixed3_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_fixed3_stanfit$print()
  variable       mean     median    sd   mad         q5        q95 rhat
 lp__      -112033.61 -112034.00 69.20 69.68 -112145.00 -111916.95 1.01
 lambda[1]       0.42       0.40  0.18  0.17       0.18       0.75 1.00
 lambda[2]       0.55       0.55  0.10  0.09       0.40       0.72 1.00
 lambda[3]       0.04       0.03  0.03  0.02       0.01       0.09 1.00
 lambda[4]       1.54       1.54  0.17  0.17       1.27       1.80 1.00
 lambda[5]       0.36       0.35  0.08  0.08       0.23       0.50 1.01
 lambda[6]       0.27       0.26  0.07  0.07       0.17       0.39 1.00
 lambda[7]       0.06       0.05  0.03  0.03       0.02       0.12 1.00
 lambda[8]       0.42       0.41  0.10  0.10       0.27       0.59 1.00
 lambda[9]       0.17       0.17  0.06  0.05       0.09       0.27 1.00
 ess_bulk ess_tail
      567      952
     2946     1518
     3776     1530
     2311     1023
     2897     1327
     3678     1512
     3096     1582
     3006     1164
     3408     1409
     3508     1367

 # showing 10 of 12718 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_fixed3_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed3-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed3-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed3-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed3-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

4.1 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_fixed3_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We want to check the \(N_{eff}\) statistics also.

Code
pnbd_onlineretail_fixed3_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_fixed3_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

4.2 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_fixed3_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_fixed3_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_fixed3",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1030
  )

pnbd_onlineretail_fixed3_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_fixed3_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_fixed3_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_fixed3_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_fixed3_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_fixed3_assess_valid_simstats_tbl.rds"

4.2.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed3_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

4.2.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed3_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

            used   (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells   3811083  203.6   37422128 1998.6  64346438 3436.5
Vcells 176711155 1348.2  390442737 2978.9 325302280 2481.9

5 Fit Narrow-Short-Lifetime Model

We now want to try a model where we use priors with a tighter coefficient of variation for lifetime but keep the CoV for transaction frequency.

Code
stan_modelname <- "pnbd_onlineretail_fixed4"
stanfit_seed   <- stanfit_seed + 1
stanfit_prefix <- str_c("fit_", stan_modelname) 

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    lambda_mn = 0.25,
    lambda_cv = 1.00,
    
    mu_mn     = 0.20,
    mu_cv     = 0.30,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_onlineretail_fixed4_stanfit <- pnbd_fixed_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_onlineretail_fixed4_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  pnbd_onlineretail_fixed4_stanfit <- read_rds(stanfit_object_file)
}

pnbd_onlineretail_fixed4_stanfit$print()
  variable       mean     median    sd   mad         q5        q95 rhat
 lp__      -184742.88 -184743.00 67.16 68.94 -184850.05 -184631.00 1.01
 lambda[1]       0.43       0.41  0.18  0.18       0.18       0.77 1.00
 lambda[2]       0.55       0.55  0.10  0.10       0.40       0.73 1.01
 lambda[3]       0.04       0.03  0.03  0.02       0.01       0.09 1.00
 lambda[4]       1.55       1.55  0.17  0.18       1.28       1.84 1.00
 lambda[5]       0.36       0.35  0.08  0.08       0.24       0.51 1.00
 lambda[6]       0.27       0.26  0.07  0.07       0.17       0.39 1.00
 lambda[7]       0.06       0.05  0.04  0.03       0.01       0.13 1.00
 lambda[8]       0.42       0.42  0.10  0.09       0.27       0.60 1.00
 lambda[9]       0.18       0.17  0.06  0.06       0.09       0.29 1.00
 ess_bulk ess_tail
      687     1058
     4095     1213
     4053     1312
     3014     1026
     3700     1418
     3162     1338
     4759     1505
     2096     1044
     2641     1444
     3140     1166

 # showing 10 of 12718 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_onlineretail_fixed4_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed4-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed4-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed4-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_onlineretail_fixed4-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

5.1 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_onlineretail_fixed4_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We want to check the \(N_{eff}\) statistics also.

Code
pnbd_onlineretail_fixed4_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_onlineretail_fixed4_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

5.2 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_onlineretail_fixed4_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_onlineretail_fixed4_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_onlineretail_fixed4",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 1040
  )

pnbd_onlineretail_fixed4_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_onlineretail_fixed4_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_onlineretail_fixed4_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_onlineretail_fixed4_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_onlineretail_fixed4_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_onlineretail_fixed4_assess_valid_simstats_tbl.rds"

5.2.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed4_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

5.2.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_onlineretail_fixed4_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

            used   (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells   3845280  205.4   36908387 1971.2  64346438 3436.5
Vcells 220377749 1681.4  390442737 2978.9 390442667 2978.9

6 Compare Model Outputs

We have looked at each of the models individually, but it is also worth looking at each of the models as a group.

We now want to combine both the fit and valid transaction sets to calculate the summary statistics for both.

Code
obs_summstats_tbl <- list(
    fit   = customer_fit_transactions_tbl,
    valid = customer_valid_transactions_tbl
    ) |>
  bind_rows(.id = "assess_type") |>
  group_by(assess_type) |>
  calculate_transaction_summary_statistics() |>
  pivot_longer(
    cols      = !assess_type,
    names_to  = "label",
    values_to = "obs_value"
    )

obs_summstats_tbl |> glimpse()
Rows: 16
Columns: 3
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "v…
$ label       <chr> "p10", "p25", "p50", "p75", "p90", "p99", "total_count", "…
$ obs_value   <dbl> 1.000000, 1.000000, 2.000000, 5.000000, 9.000000, 27.01000…
Code
model_assess_transactions_tbl <- dir_ls("data", regexp = "pnbd_onlineretail_fixed.*_assess_.*index") |>
  enframe(name = NULL, value = "file_path") |>
  mutate(
    model_label = str_replace(file_path, "data/pnbd_onlineretail_(.*?)_assess_.*", "\\1"),
    assess_type = if_else(str_detect(file_path, "_assess_fit_"), "fit", "valid"),
    
    assess_data = map(
      file_path, construct_model_assessment_data,
      
      .progress = "construct_assess_data"
      )
    ) |>
  select(model_label, assess_type, assess_data) |>
  unnest(assess_data)

model_assess_transactions_tbl |> glimpse()
Rows: 34,527,009
Columns: 6
$ model_label   <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed…
$ assess_type   <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", …
$ customer_id   <fct> 13078, 13078, 13078, 13078, 13078, 13078, 13078, 13078, …
$ draw_id       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ tnx_timestamp <dttm> 2009-12-01 12:50:44, 2010-01-10 17:25:42, 2010-01-20 11…
$ tnx_amount    <dbl> 113.06, 154.46, 130.47, 194.81, 104.02, 99.04, 83.73, 23…

We now want to calculate the transaction statistics on this full dataset, for each separate draw.

Code
model_assess_tbl <- model_assess_transactions_tbl |>
  group_by(model_label, assess_type, draw_id) |>
  calculate_transaction_summary_statistics()

model_assess_tbl |> glimpse()
Rows: 16,000
Columns: 11
$ model_label <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed1"…
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "f…
$ draw_id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ p10         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p25         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p50         <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ p75         <dbl> 5.00, 5.00, 6.00, 5.00, 5.00, 5.00, 5.00, 5.00, 6.00, 6.00…
$ p90         <dbl> 9.0, 9.0, 10.0, 10.0, 10.0, 11.0, 9.7, 10.0, 10.0, 10.0, 1…
$ p99         <dbl> 27.58, 26.76, 26.56, 26.33, 24.69, 25.49, 25.00, 26.02, 28…
$ total_count <int> 2812, 2692, 2822, 2680, 2747, 2891, 2847, 2792, 2834, 2817…
$ mean_count  <dbl> 4.373250, 4.391517, 4.529695, 4.542373, 4.346519, 4.677994…

We now combine all this data to create a number of different comparison plots for the various summary statistics.

Code
#! echo: TRUE

create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "total_count", "Total Transactions"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "mean_count", "Average Transactions per Customer"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "p99", "99th Percentile Count"
  )

6.1 Write Assessment Data to Disk

We now want to save the assessment data to disk.

Code
model_assess_tbl |> write_rds("data/assess_data_pnbd_onlineretail_fixed_tbl.rds")

R Environment

Code
options(width = 120L)
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16)
 os       Ubuntu 22.04.3 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Dublin
 date     2024-02-16
 pandoc   3.1.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package        * version   date (UTC) lib source
 abind            1.4-5     2016-07-21 [1] RSPM (R 4.3.0)
 arrayhelpers     1.1-0     2020-02-04 [1] RSPM (R 4.3.0)
 backports        1.4.1     2021-12-13 [1] RSPM (R 4.3.0)
 base64enc        0.1-3     2015-07-28 [1] RSPM (R 4.3.0)
 bayesplot      * 1.10.0    2022-11-16 [1] RSPM (R 4.3.0)
 bit              4.0.5     2022-11-15 [1] RSPM (R 4.3.0)
 bit64            4.0.5     2020-08-30 [1] RSPM (R 4.3.0)
 bridgesampling   1.1-2     2021-04-16 [1] RSPM (R 4.3.0)
 brms           * 2.20.4    2023-09-25 [1] RSPM (R 4.3.0)
 Brobdingnag      1.2-9     2022-10-19 [1] RSPM (R 4.3.0)
 cachem           1.0.8     2023-05-01 [1] RSPM (R 4.3.0)
 callr            3.7.3     2022-11-02 [1] RSPM (R 4.3.0)
 checkmate        2.3.0     2023-10-25 [1] RSPM (R 4.3.0)
 cli              3.6.1     2023-03-23 [1] RSPM (R 4.3.0)
 cmdstanr       * 0.7.0     2023-12-13 [1] Github (stan-dev/cmdstanr@7e10703)
 coda             0.19-4    2020-09-30 [1] RSPM (R 4.3.0)
 codetools        0.2-19    2023-02-01 [2] CRAN (R 4.3.1)
 colorspace       2.1-0     2023-01-23 [1] RSPM (R 4.3.0)
 colourpicker     1.3.0     2023-08-21 [1] RSPM (R 4.3.0)
 conflicted     * 1.2.0     2023-02-01 [1] RSPM (R 4.3.0)
 cowplot        * 1.1.1     2020-12-30 [1] RSPM (R 4.3.0)
 crayon           1.5.2     2022-09-29 [1] RSPM (R 4.3.0)
 crosstalk        1.2.0     2021-11-04 [1] RSPM (R 4.3.0)
 curl             5.1.0     2023-10-02 [1] RSPM (R 4.3.0)
 digest           0.6.33    2023-07-07 [1] RSPM (R 4.3.0)
 directlabels   * 2023.8.25 2023-09-01 [1] RSPM (R 4.3.0)
 distributional   0.3.2     2023-03-22 [1] RSPM (R 4.3.0)
 dplyr          * 1.1.3     2023-09-03 [1] RSPM (R 4.3.0)
 DT               0.30      2023-10-05 [1] RSPM (R 4.3.0)
 dygraphs         1.1.1.6   2018-07-11 [1] RSPM (R 4.3.0)
 ellipsis         0.3.2     2021-04-29 [1] RSPM (R 4.3.0)
 evaluate         0.22      2023-09-29 [1] RSPM (R 4.3.0)
 fansi            1.0.5     2023-10-08 [1] RSPM (R 4.3.0)
 farver           2.1.1     2022-07-06 [1] RSPM (R 4.3.0)
 fastmap          1.1.1     2023-02-24 [1] RSPM (R 4.3.0)
 forcats        * 1.0.0     2023-01-29 [1] RSPM (R 4.3.0)
 fs             * 1.6.3     2023-07-20 [1] RSPM (R 4.3.0)
 furrr          * 0.3.1     2022-08-15 [1] RSPM (R 4.3.0)
 future         * 1.33.0    2023-07-01 [1] RSPM (R 4.3.0)
 generics         0.1.3     2022-07-05 [1] RSPM (R 4.3.0)
 ggdist           3.3.0     2023-05-13 [1] RSPM (R 4.3.0)
 ggplot2        * 3.4.4     2023-10-12 [1] RSPM (R 4.3.0)
 globals          0.16.2    2022-11-21 [1] RSPM (R 4.3.0)
 glue           * 1.6.2     2022-02-24 [1] RSPM (R 4.3.0)
 gridExtra        2.3       2017-09-09 [1] RSPM (R 4.3.0)
 gtable           0.3.4     2023-08-21 [1] RSPM (R 4.3.0)
 gtools           3.9.4     2022-11-27 [1] RSPM (R 4.3.0)
 hms              1.1.3     2023-03-21 [1] RSPM (R 4.3.0)
 htmltools        0.5.6.1   2023-10-06 [1] RSPM (R 4.3.0)
 htmlwidgets      1.6.2     2023-03-17 [1] RSPM (R 4.3.0)
 httpuv           1.6.12    2023-10-23 [1] RSPM (R 4.3.0)
 igraph           1.5.1     2023-08-10 [1] RSPM (R 4.3.0)
 inline           0.3.19    2021-05-31 [1] RSPM (R 4.3.0)
 jsonlite         1.8.7     2023-06-29 [1] RSPM (R 4.3.0)
 knitr            1.44      2023-09-11 [1] RSPM (R 4.3.0)
 labeling         0.4.3     2023-08-29 [1] RSPM (R 4.3.0)
 later            1.3.1     2023-05-02 [1] RSPM (R 4.3.0)
 lattice          0.21-8    2023-04-05 [2] CRAN (R 4.3.1)
 lifecycle        1.0.3     2022-10-07 [1] RSPM (R 4.3.0)
 listenv          0.9.0     2022-12-16 [1] RSPM (R 4.3.0)
 lobstr         * 1.1.2     2022-06-22 [1] RSPM (R 4.3.0)
 loo              2.6.0     2023-03-31 [1] RSPM (R 4.3.0)
 lubridate      * 1.9.3     2023-09-27 [1] RSPM (R 4.3.0)
 magrittr       * 2.0.3     2022-03-30 [1] RSPM (R 4.3.0)
 markdown         1.11      2023-10-19 [1] RSPM (R 4.3.0)
 Matrix           1.5-4.1   2023-05-18 [2] CRAN (R 4.3.1)
 matrixStats      1.0.0     2023-06-02 [1] RSPM (R 4.3.0)
 memoise          2.0.1     2021-11-26 [1] RSPM (R 4.3.0)
 mime             0.12      2021-09-28 [1] RSPM (R 4.3.0)
 miniUI           0.1.1.1   2018-05-18 [1] RSPM (R 4.3.0)
 munsell          0.5.0     2018-06-12 [1] RSPM (R 4.3.0)
 mvtnorm          1.2-3     2023-08-25 [1] RSPM (R 4.3.0)
 nlme             3.1-162   2023-01-31 [2] CRAN (R 4.3.1)
 parallelly       1.36.0    2023-05-26 [1] RSPM (R 4.3.0)
 pillar           1.9.0     2023-03-22 [1] RSPM (R 4.3.0)
 pkgbuild         1.4.2     2023-06-26 [1] RSPM (R 4.3.0)
 pkgconfig        2.0.3     2019-09-22 [1] RSPM (R 4.3.0)
 plyr             1.8.9     2023-10-02 [1] RSPM (R 4.3.0)
 posterior      * 1.4.1     2023-03-14 [1] RSPM (R 4.3.0)
 prettyunits      1.2.0     2023-09-24 [1] RSPM (R 4.3.0)
 processx         3.8.2     2023-06-30 [1] RSPM (R 4.3.0)
 promises         1.2.1     2023-08-10 [1] RSPM (R 4.3.0)
 ps               1.7.5     2023-04-18 [1] RSPM (R 4.3.0)
 purrr          * 1.0.2     2023-08-10 [1] RSPM (R 4.3.0)
 quadprog         1.5-8     2019-11-20 [1] RSPM (R 4.3.0)
 QuickJSR         1.0.7     2023-10-15 [1] RSPM (R 4.3.0)
 R6               2.5.1     2021-08-19 [1] RSPM (R 4.3.0)
 Rcpp           * 1.0.11    2023-07-06 [1] RSPM (R 4.3.0)
 RcppParallel     5.1.7     2023-02-27 [1] RSPM (R 4.3.0)
 readr          * 2.1.4     2023-02-10 [1] RSPM (R 4.3.0)
 reshape2         1.4.4     2020-04-09 [1] RSPM (R 4.3.0)
 rlang          * 1.1.1     2023-04-28 [1] RSPM (R 4.3.0)
 rmarkdown        2.25      2023-09-18 [1] RSPM (R 4.3.0)
 rstan            2.32.3    2023-10-15 [1] RSPM (R 4.3.0)
 rstantools       2.3.1.1   2023-07-18 [1] RSPM (R 4.3.0)
 rstudioapi       0.15.0    2023-07-07 [1] RSPM (R 4.3.0)
 scales         * 1.2.1     2022-08-20 [1] RSPM (R 4.3.0)
 sessioninfo      1.2.2     2021-12-06 [1] RSPM (R 4.3.0)
 shiny            1.7.5.1   2023-10-14 [1] RSPM (R 4.3.0)
 shinyjs          2.1.0     2021-12-23 [1] RSPM (R 4.3.0)
 shinystan        2.6.0     2022-03-03 [1] RSPM (R 4.3.0)
 shinythemes      1.2.0     2021-01-25 [1] RSPM (R 4.3.0)
 StanHeaders      2.26.28   2023-09-07 [1] RSPM (R 4.3.0)
 stringi          1.7.12    2023-01-11 [1] RSPM (R 4.3.0)
 stringr        * 1.5.0     2022-12-02 [1] RSPM (R 4.3.0)
 svUnit           1.0.6     2021-04-19 [1] RSPM (R 4.3.0)
 tensorA          0.36.2    2020-11-19 [1] RSPM (R 4.3.0)
 threejs          0.3.3     2020-01-21 [1] RSPM (R 4.3.0)
 tibble         * 3.2.1     2023-03-20 [1] RSPM (R 4.3.0)
 tidybayes      * 3.0.6     2023-08-12 [1] RSPM (R 4.3.0)
 tidyr          * 1.3.0     2023-01-24 [1] RSPM (R 4.3.0)
 tidyselect       1.2.0     2022-10-10 [1] RSPM (R 4.3.0)
 tidyverse      * 2.0.0     2023-02-22 [1] RSPM (R 4.3.0)
 timechange       0.2.0     2023-01-11 [1] RSPM (R 4.3.0)
 tzdb             0.4.0     2023-05-12 [1] RSPM (R 4.3.0)
 utf8             1.2.4     2023-10-22 [1] RSPM (R 4.3.0)
 V8               4.4.0     2023-10-09 [1] RSPM (R 4.3.0)
 vctrs            0.6.4     2023-10-12 [1] RSPM (R 4.3.0)
 vroom            1.6.4     2023-10-02 [1] RSPM (R 4.3.0)
 withr            2.5.1     2023-09-26 [1] RSPM (R 4.3.0)
 xfun             0.40      2023-08-09 [1] RSPM (R 4.3.0)
 xtable           1.8-4     2019-04-21 [1] RSPM (R 4.3.0)
 xts              0.13.1    2023-04-16 [1] RSPM (R 4.3.0)
 yaml             2.3.7     2023-01-23 [1] RSPM (R 4.3.0)
 zoo              1.8-12    2023-04-13 [1] RSPM (R 4.3.0)

 [1] /usr/local/lib/R/site-library
 [2] /usr/local/lib/R/library

──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Code
options(width = 80L)